package edu.osu.slate.relatedness.swwr.setup.graph;

import java.io.*;
import java.util.*;

import edu.osu.slate.relatedness.Configuration;
import edu.osu.slate.relatedness.swwr.data.ConvertIDToTitle;
import edu.osu.slate.relatedness.swwr.data.ConvertTitleToID;
import edu.osu.slate.relatedness.swwr.data.graph.IDIDRedirect;
import edu.osu.slate.relatedness.swwr.data.graph.IDVertexTranslation;
import edu.osu.slate.relatedness.swwr.data.graph.WikiGraph;
import edu.osu.slate.relatedness.swwr.data.graph.WikiInvGraph;

/**
 * This program is responsible for generating the alias files used as input to {@link wwr.data.WikiGraph} and {@link wwr.data.WikiInvGraph}
 * 
 * In the Graph creation pipeline, this program is:
 * <ul>
 *   <li>Preceded by {@link CreateIDToVertexFile}, {@link CreateTitleIDFiles} and {@link CreateRedirectFiles}
 *   <li>Followed by --none--
 * </ul>
 * 
 * @author weale
 * @version 1.0;alpha
 */
public class CreateGraphFiles {

  /* Verbose output flag */
  private static boolean verbose;

  /* Name of the input file generated by Wikipedia (pagelinks.sql) */
  private static String inputFileName;

  /* Name of the Valid ID file generated by CreateValidIDFile (.vid) */
  private static String vidFileName;

  /* Name of the input file generated by CreateRedirectFiles (.rdr) */
  private static String rdrFileName;

  /* Name of the output file (.tid) */
  private static String tidFileName;

  /* Name of the output file (.wgp) */
  private static String graphFileName;

  /* Name of the output file (.iwgp) */
  private static String invertedFileName;

  private static void setFiles()
  {
    String binaryDir = Configuration.baseDir + "/" +
                       Configuration.binaryDir + "/" +
                       Configuration.type + "/" +
                       Configuration.date + "/";
    
    String sourceDir = Configuration.baseDir + "/" +
                       Configuration.sourceDir + "/" +
                       Configuration.type + "/" +
                       Configuration.date + "/";
    
    inputFileName = sourceDir +
                    Configuration.type + "-" +
                    Configuration.date + "-" +
                    "pagelinks.sql";
    
    vidFileName = binaryDir +
                  Configuration.type + "-" +
                  Configuration.date + "-" +
                  Configuration.graph + ".vid";

    tidFileName = binaryDir +
                  Configuration.type + "-" +
                  Configuration.date + "-" +
                  Configuration.graph + ".tid";
    
    rdrFileName = binaryDir +
                  Configuration.type + "-" +
                  Configuration.date + "-" +
                  Configuration.graph + ".rdr";
    
    graphFileName = binaryDir +
                    Configuration.type + "-" +
                    Configuration.date + "-" +
                    Configuration.graph + ".wgp";
    
    invertedFileName = binaryDir +
                       Configuration.type + "-" +
                       Configuration.date + "-" +
                       Configuration.graph + ".iwgp";
  }
  
  /**
   * @param args
   * @throws IOException 
   */
  public static void main(String[] args) throws IOException {

    /* Intialize local variables */
    int numTitles = 0;
    Scanner in;
    String str;

    if(args.length == 1)
    {
      Configuration.parseConfigurationFile(args[0]);
    }
    else
    {
      Configuration.parseConfigurationFile("/scratch/weale/data/config/enwiktionary/CreateMappings.xml");
    }
    
    setFiles();
    
    //inputFileName = "/scratch/weale/data/source/20080103/enwiki-20080103-pagelinks.sql";
    //vidFileName = "/scratch/weale/data/binary/enwiki-20080103-M.vid";
    //rdrFileName = "/scratch/weale/data/binary/enwiki-20080103-M.rdr";
    //tidFileName = "/scratch/weale/data/binary/enwiki-20080103-M.tid";
    //graphFileName = "/scratch/weale/data/binary/enwiki-20080103-M.wgp";
    //invertedFileName = "/scratch/weale/data/binary/enwiki-20080103-M.iwgp";

    /* STEP 1
     * 
     * Initialize the ValidID list, Redirect List and ConvertTitleToID List
     */
    System.out.println("Initializing Valid IDs.");
    IDVertexTranslation vids = null;
    try
    {
      ObjectInputStream objIn = new ObjectInputStream(new FileInputStream(vidFileName));
      vids = (IDVertexTranslation) objIn.readObject();
      objIn.close();
    }
    catch(Exception e)
    {
      System.out.println("Problem with file: " + vidFileName);
      System.exit(1);
    }
    
    System.out.println("Initializing Redirect List.");
    IDIDRedirect rdl = null;
    try
    {
      ObjectInputStream objIn = new ObjectInputStream(new FileInputStream(rdrFileName));
      rdl = (IDIDRedirect) objIn.readObject();
      objIn.close();
    }
    catch(Exception e)
    {
      System.out.println("Problem with file: " + rdrFileName);
      System.exit(1);
    }
    
    System.out.println("Initializing Title/ID Table.");
    ConvertTitleToID tid = null;
    try
    {
      ObjectInputStream objIn = new ObjectInputStream(new FileInputStream(tidFileName));
      //ConvertTitleToID tid = new ConvertTitleToID(tidFileName);
      tid = (ConvertTitleToID) objIn.readObject();
      ConvertIDToTitle tmp = (ConvertIDToTitle) objIn.readObject();
      objIn.close();
    }
    catch(Exception e)
    {
      System.out.println("Problem with file: " + tidFileName);
      System.exit(1);
    }

    /* STEP 2
     * 
     * Create from->to array and the inverted array to->from
     */
    System.out.println("Creating graph. Size = " + vids.numVertices());

    int [][] graph = new int[vids.numVertices()][];
    int [][] igraph = new int[vids.numVertices()][];
    float [][] graphtrans = new float[vids.numVertices()][];
    float [][] igraphtrans = new float[vids.numVertices()][];
    for(int i=0; i<graph.length; i++) {
      graph[i] = null;
      igraph[i] = null;
      graphtrans[i] = null;
      igraphtrans[i] = null;
    }

    /* STEP 3
     * Populate the graph w/ the 'to' ids.
     * 
     * This is done by:
     * 1. Check the 'from' page as a valid ID or a redirect ID
     * 2. Resolve the 'to' page
     * 
     */
    if(verbose) {
      System.out.println("Initializing graph.");
    }
    in = new Scanner(new FileReader(inputFileName));

    str = in.nextLine();
    while(str.indexOf("INSERT INTO") == -1) {
      str = in.nextLine();
    }

    int line = 0;

    while(str != null && !str.trim().equals("")) {
      str = str.substring(str.indexOf("(")+1, str.length()-3);

      // Split the String into the page information
      String [] arr = str.split("\\),\\(");
      for(int i = 0; i < arr.length; i++)
      {
        String[] info = arr[i].split(",");

        // Check if the information is in the correct format
        if(info.length >= 3) {

          // Extract page, namespace and redirect information
          int fromID = Integer.parseInt(info[0]);
          String namespace = info[1];
          String title = info[2];
          for(int j = 3; j < info.length; j++)
          {
            title = title + "," + info[j];
          }//end: for(j)

          if(title.length() > 0) {

            try
            {
              title = title.substring(1, title.length()-1);
            }
            catch(Exception e)
            {
              System.out.println(arr[i]);
            }

            // Add the ID if it's in the needed namespace
            if(namespace.equals("0"))
            {
              int fromVertex = vids.getVertex(fromID);
              int toID = tid.getID(title);
              int toVertex = -1;
              if(vids.isValidWikiID(toID))
              {
                toVertex = vids.getVertex(toID);
              }
              else if(rdl.isRedirectID(toID))
              {
                int tmpID = rdl.redirectIDToValidID(toID);
                toVertex = vids.getVertex(tmpID);
              }

              /* Check valid from/to pairing */
              if(fromVertex > -1 && toVertex > -1) {

                /* Add to outbound graph */
                graph[fromVertex] = checkList(graph[fromVertex], toVertex);

                /* Add to inbound graph */
                igraph[toVertex] = checkList(graph[toVertex], fromVertex);
              }//end: valid page insert
            }//end: valid namespace and 'from' page
          }
        }//end: if(info.length)
      }//end: for(i)

      str = in.nextLine();
    }//end: while()
    in.close();

    /* STEP 4
     * 
     * Sort link arrays.
     * Also, give a uniform transition to the graph vertices.
     */
    for(int i = 0; i < graph.length; i++)
    {
      // Check for non-null outbound edges
      if(graph[i] != null)
      {
        Arrays.sort(graph[i]);
        
        graphtrans[i] = new float[graph[i].length];
        for(int j = 0; j < graphtrans[i].length; j++)
        {
          graphtrans[i][j] = (float) (1.0 / graphtrans[i].length);          
        }//end: for(j)
      }
      
      // Check for non-null inbound edges
      if(igraph[i] != null)
      {
        Arrays.sort(igraph[i]);
        
        igraphtrans[i] = new float[igraph[i].length];
        for(int j = 0; j < igraphtrans[i].length; j++)
        {
          igraphtrans[i][j] = (float) (1.0 / igraphtrans[i].length);          
        }//end: for(j)
      }
    }//end: for(i)

    /* STEP 5
     * 
     * Print Object Files (.wgp) (.iwgp)
     * 
     * Objects are written as int[][]
     */
    if(verbose) {
      System.out.println("Printing Files.");
    }

    ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(graphFileName));
    //out.writeObject(graph);
    //out.writeObject(graphtrans);
    WikiGraph wg = new WikiGraph(graph, graphtrans);
    out.writeObject(wg);
    out.close();

    out = new ObjectOutputStream(new FileOutputStream(invertedFileName));
    //out.writeObject(igraph);
    //out.writeObject(igraphtrans);
    WikiInvGraph wig = new WikiInvGraph(igraph, igraphtrans);
    out.writeObject(wig);
    out.close();
  }

  /**
   * 
   * @param curr
   * @param link
   * @return
   */
  private static int[] checkList(int[] currVertices, int vertex) {
    if(currVertices == null)
    {
      currVertices = new int[1];
      currVertices[0] = vertex;
    }

    boolean found = false;
    for(int i = 0; !found && i < currVertices.length; i++)
    {
      if(currVertices[i] == vertex)
      {
        found = true;
      }
    }

    // vertex number already in list
    if(found)
    {
      return currVertices;
    }

    int [] newCurr = new int[currVertices.length+1];
    newCurr[0] = vertex;
    for(int i=1; i<newCurr.length; i++) {
      newCurr[i] = currVertices[i-1];
    }
    return newCurr;
  }

}
